Below you can find some of the code created during the #TidyTuesday night by the R-Ladies Melbourne members across all level or R experience!

Philip Newall #TidyTuesday: some of the very first attempts using R!!

library(tidyverse)
library(psych)

measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')

describe(measles) #from psych package
##          vars     n    mean      sd  median trimmed     mad     min     max
## index       1 66113 1607.69 1706.77  997.00 1274.49 1022.99    1.00 8066.00
## state*      2 66113     NaN      NA      NA     NaN      NA     Inf    -Inf
## year*       3 66113 2017.00    0.00 2017.00 2017.00    0.00 2017.00 2017.00
## name*       4 66113    2.00      NA    2.00    2.00    0.00    2.00    2.00
## type*       5 29491     NaN      NA      NA     NaN      NA     Inf    -Inf
## city*       6 46042     NaN      NA      NA     NaN      NA     Inf    -Inf
## county*     7 59859     NaN      NA      NA     NaN      NA     Inf    -Inf
## district    8     0     NaN      NA      NA     NaN      NA     Inf    -Inf
## enroll      9 49853  131.93  162.16   80.00   96.41   56.34    0.00 6222.00
## mmr        10 66113   63.17   45.72   95.00   66.59    7.10   -1.00  100.00
## overall    11 66113   54.09   46.67   87.00   55.30   17.79   -1.00  100.00
## xrel       12   109     NaN      NA      NA     NaN      NA     Inf    -Inf
## xmed       13 20991    2.91    3.85    2.00    2.23    1.48    0.04  100.00
## xper       14  8553    6.78    7.62    5.00    5.35    3.32    0.17  169.23
## lat        15 64564   39.15    4.58   40.21   39.32    4.15   24.55   49.00
## lng        16 64564  -96.28   18.44  -89.97  -96.16   21.85 -124.50   80.21
##            range  skew kurtosis   se
## index    8065.00  1.73     2.64 6.64
## state*      -Inf    NA       NA   NA
## year*       0.00   NaN      NaN 0.00
## name*       0.00    NA       NA   NA
## type*       -Inf    NA       NA   NA
## city*       -Inf    NA       NA   NA
## county*     -Inf    NA       NA   NA
## district    -Inf    NA       NA   NA
## enroll   6222.00  3.86    52.69 0.73
## mmr       101.00 -0.66    -1.52 0.18
## overall   101.00 -0.30    -1.86 0.18
## xrel        -Inf    NA       NA   NA
## xmed       99.96  6.36    80.49 0.03
## xper      169.06  5.12    50.16 0.08
## lat        24.45 -0.49     0.03 0.02
## lng       204.70  0.34     2.89 0.07
ggplot(measles, aes(x=mmr,y=overall))+
  geom_point(color="red")

Nyssa Silbiger #TidyTuesday

if (!require('tidyverse')) install.packages('tidyverse')
if (!require('geofacet')) install.packages('geofacet')
remotes::install_github("wilkelab/ggtext")
if (!require('hrbrthemes')) install.packages('hrbrthemes')
remotes::install_git("https://git.rud.is/hrbrmstr/hrbrthemes.git")
if (!require('extrafont')) install.packages('extrafont')
if (!require('skimr')) install.packages('skimr')

Note from R-Ladies Melbourne GitHub maintainer: Unfortunately, when assembling the code I had issue with installing the hrbrthemes which I therefore excluded below.

#load libraries
library(tidyverse)
library(geofacet)
library(ggtext)
# library(hrbrthemes)
library(extrafont)
library(skimr)
# Download the data
measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')
skimr::skim(measles)
Data summary
Name measles
Number of rows 66113
Number of columns 16
_______________________
Column type frequency:
character 6
logical 2
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
state 0 1.00 4 14 0 32 0
year 0 1.00 4 7 0 4 0
name 0 1.00 1 91 0 36129 0
type 36622 0.45 5 12 0 6 0
city 20071 0.70 2 43 0 5665 0
county 6254 0.91 3 21 0 1158 0

Variable type: logical

skim_variable n_missing complete_rate mean count
district 66113 0 NaN :
xrel 66004 0 1 TRU: 109

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
index 0 1.00 1607.69 1706.77 1.00 429.00 997.00 2133.00 8066.00 ▇▂▁▁▁
enroll 16260 0.75 131.93 162.16 0.00 46.00 80.00 129.00 6222.00 ▇▁▁▁▁
mmr 0 1.00 63.17 45.72 -1.00 -1.00 95.00 98.00 100.00 ▅▁▁▁▇
overall 0 1.00 54.09 46.67 -1.00 -1.00 87.00 96.10 100.00 ▆▁▁▁▇
xmed 45122 0.32 2.91 3.85 0.04 1.00 2.00 3.53 100.00 ▇▁▁▁▁
xper 57560 0.13 6.78 7.62 0.17 2.84 5.00 7.55 169.23 ▇▁▁▁▁
lat 1549 0.98 39.15 4.58 24.55 35.69 40.21 42.18 49.00 ▁▃▅▇▂
lng 1549 0.98 -96.28 18.44 -124.50 -117.63 -89.97 -81.75 80.21 ▇▃▁▁▁
summary(measles)
##      index         state               year               name          
##  Min.   :   1   Length:66113       Length:66113       Length:66113      
##  1st Qu.: 429   Class :character   Class :character   Class :character  
##  Median : 997   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1608                                                           
##  3rd Qu.:2133                                                           
##  Max.   :8066                                                           
##                                                                         
##      type               city              county          district      
##  Length:66113       Length:66113       Length:66113       Mode:logical  
##  Class :character   Class :character   Class :character   NA's:66113    
##  Mode  :character   Mode  :character   Mode  :character                 
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##      enroll            mmr            overall         xrel        
##  Min.   :   0.0   Min.   : -1.00   Min.   : -1.00   Mode:logical  
##  1st Qu.:  46.0   1st Qu.: -1.00   1st Qu.: -1.00   TRUE:109      
##  Median :  80.0   Median : 95.00   Median : 87.00   NA's:66004    
##  Mean   : 131.9   Mean   : 63.17   Mean   : 54.09                 
##  3rd Qu.: 129.0   3rd Qu.: 98.00   3rd Qu.: 96.10                 
##  Max.   :6222.0   Max.   :100.00   Max.   :100.00                 
##  NA's   :16260                                                    
##       xmed             xper             lat             lng         
##  Min.   :  0.04   Min.   :  0.17   Min.   :24.55   Min.   :-124.50  
##  1st Qu.:  1.00   1st Qu.:  2.84   1st Qu.:35.69   1st Qu.:-117.63  
##  Median :  2.00   Median :  5.00   Median :40.21   Median : -89.97  
##  Mean   :  2.91   Mean   :  6.78   Mean   :39.15   Mean   : -96.28  
##  3rd Qu.:  3.53   3rd Qu.:  7.55   3rd Qu.:42.18   3rd Qu.: -81.75  
##  Max.   :100.00   Max.   :169.23   Max.   :49.00   Max.   :  80.21  
##  NA's   :45122    NA's   :57560    NA's   :1549    NA's   :1549
## get percentage of students that did not get the mmr shot for religious, medical, or personal reasons accross all states
measles_df <- measles %>%
  filter(mmr >0) %>% #removed schools with no reported value (-1)
  mutate(xrel = as.numeric(xrel))%>% # it is showing up as a logical instead of a value
  replace_na(list(xmed = 0, xrel = 0, xper = 0, enroll = 0, overall = 0, mmr = 0)) %>% # replace all NA with 0
  dplyr::select(-lat, -lng, -index, -enroll, -overall) %>% #remove lat and long
  pivot_longer(cols = xrel:xper, names_to = "Reason", values_to = "percentage") %>% # have all the reasons together for the barplot below
  group_by(state, Reason) %>%
  summarise_if(is.numeric, list(~mean(.), ~sd(.)/sqrt(n()))) %>% # get means and SE
  rename(SE = `percentage_/`)
glimpse(measles_df)
## Observations: 63
## Variables: 6
## Groups: state [21]
## $ state           <chr> "Arizona", "Arizona", "Arizona", "Arkansas", "Arkansa…
## $ Reason          <chr> "xmed", "xper", "xrel", "xmed", "xper", "xrel", "xmed…
## $ mmr_mean        <dbl> 92.63085, 92.63085, 92.63085, 80.49296, 80.49296, 80.…
## $ percentage_mean <dbl> 0.28127242, 6.14345858, 0.00000000, 0.00000000, 0.000…
## $ `mmr_/`         <dbl> 0.26844577, 0.26844577, 0.26844577, 0.36855464, 0.368…
## $ SE              <dbl> 0.0253427950, 0.2329142840, 0.0000000000, 0.000000000…
# rename the SE column
ggplot(measles_df,aes(x = Reason, y = percentage_mean, color = Reason, fill = Reason))+
  geom_bar(stat = 'identity')+
  geom_errorbar(aes(ymin = percentage_mean - SE, ymax = percentage_mean+SE), color = "white", width = 0)+
 # theme_ft_rc() + # black background theme
  labs(caption = "Plot by N. Silbiger \n@nsilbiger \nData by the Wallstreet Journal")+
  theme(axis.title.x=element_blank(),  # remove xlabels
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        #plot.title = element_markdown(lineheight = 1.1)
        
        legend.position = "none")+
  xlab("")+ylab("")+
  labs(title = "Mean prcentage of students that refused vaccines due to  medical , personal , or  religious")+
  facet_geo(~ state) # facet wrap it by state

measles_df2 <- measles %>%
  filter(mmr >0) %>% #removed schools with no reported value (-1)
  mutate(xrel = as.numeric(xrel))%>% # it is showing up as a logical instead of a value
  replace_na(list(xmed = 0, xrel = 0, xper = 0, enroll = 0, overall = 0, mmr = 0)) # replace all NA with 0
glimpse(measles_df2)
## Observations: 44,157
## Variables: 16
## $ index    <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 15, 1…
## $ state    <chr> "Arizona", "Arizona", "Arizona", "Arizona", "Arizona", "Ariz…
## $ year     <chr> "2018-19", "2018-19", "2018-19", "2018-19", "2018-19", "2018…
## $ name     <chr> "A J Mitchell Elementary", "Academy Del Sol", "Academy Del S…
## $ type     <chr> "Public", "Charter", "Charter", "Charter", "Charter", "Publi…
## $ city     <chr> "Nogales", "Tucson", "Tucson", "Phoenix", "Phoenix", "Phoeni…
## $ county   <chr> "Santa Cruz", "Pima", "Pima", "Maricopa", "Maricopa", "Maric…
## $ district <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ enroll   <dbl> 51, 22, 85, 60, 43, 36, 24, 22, 26, 78, 78, 35, 54, 54, 34, …
## $ mmr      <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ overall  <dbl> -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, …
## $ xrel     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ xmed     <dbl> 0.00, 0.00, 0.00, 0.00, 2.33, 0.00, 0.00, 0.00, 0.00, 0.00, …
## $ xper     <dbl> 0.00, 0.00, 0.00, 0.00, 2.33, 0.00, 4.17, 0.00, 0.00, 0.00, …
## $ lat      <dbl> 31.34782, 32.22192, 32.13049, 33.48545, 33.49562, 33.43532, …
## $ lng      <dbl> -110.9380, -110.8961, -111.1170, -112.1306, -112.2247, -112.…
ggplot(measles_df2,aes(x = enroll, y = mmr, colour = state, fill = state))+
  geom_point( )

measles_df3 <- measles %>%
  filter(mmr >0) %>% #removed schools with no reported value (-1)
  mutate(xrel = as.numeric(xrel))%>% # it is showing up as a logical instead of a value
  replace_na(list(xmed = 0, xrel = 0, xper = 0, enroll = 0, overall = 0, mmr = 0)) %>%# replace all NA with 0
  group_by(type,state) %>% # group for visualisation
  summarise(enrollments = sum(enroll), avg_mmr = mean(mmr) ) %>%
  filter(type %in% list('Private','Public'))
glimpse(measles_df3)
## Observations: 14
## Variables: 4
## Groups: type [2]
## $ type        <chr> "Private", "Private", "Private", "Private", "Private", "P…
## $ state       <chr> "Arizona", "California", "Colorado", "Massachusetts", "Ne…
## $ enrollments <dbl> 2943, 60612, 2680, 0, 0, 24309, 4819, 66548, 1172146, 0, …
## $ avg_mmr     <dbl> 89.14419, 93.87482, 95.12190, 93.88889, 93.62700, 91.9075…
ggplot(measles_df3,aes(x = enrollments, y = avg_mmr, colour = state, fill = state, label=state))+
  geom_point( ) +
  geom_text(angle = 45)

ggplot(measles_df3,aes(x = type, y = avg_mmr, colour = state, fill = state, label=state))+
  geom_bar(position = "dodge", stat="identity" ) #+

#    geom_text(angle = 45)

Eka Tian #TidyTuesday

# By Eka Tian
# 28/Feb/2020
if (!require('tidyverse')) install.packages('tidyverse')
if (!require('geofacet')) install.packages('geofacet')
if (!require('ggtext')) install.packages('ggtext')
if (!require('hrbrthemes')) install.packages('hrbrthemes') 
if (!require('extrafont')) install.packages('extrafont')
devtools::install_github("wilkelab/ggtext")
#load libraries
library(tidyverse)
library(geofacet)
#library(hrbrthemes)
library(extrafont)
library(skimr)
library(dplyr)
library(ggtext)
# Download the data
measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')
measles_df3 <- measles %>%
  filter(mmr >0) %>% #removed schools with no reported value (-1)
  mutate(xrel = as.numeric(xrel))%>% # it is showing up as a logical instead of a value
  replace_na(list(xmed = 0, xrel = 0, xper = 0, enroll = 0, overall = 0, mmr = 0)) %>%# replace all NA with 0
  group_by(type,state) %>% # group for visualisation
  summarise(enrollments = sum(enroll), avg_mmr = mean(mmr) ) %>%
  filter(type %in% list('Private','Public'))
glimpse(measles_df3)
## Observations: 14
## Variables: 4
## Groups: type [2]
## $ type        <chr> "Private", "Private", "Private", "Private", "Private", "P…
## $ state       <chr> "Arizona", "California", "Colorado", "Massachusetts", "Ne…
## $ enrollments <dbl> 2943, 60612, 2680, 0, 0, 24309, 4819, 66548, 1172146, 0, …
## $ avg_mmr     <dbl> 89.14419, 93.87482, 95.12190, 93.88889, 93.62700, 91.9075…
ggplot(measles_df3,aes(x = type, y = avg_mmr, colour = state, fill = state, label=state))+
  geom_bar(position = "dodge", stat="identity" ) 

Insights from an experienced R user: Momeneh (Sepideh) Foroutan

Measles is a very contagious respiratory infection, and this tidy tuesday data has vaccination rates for 46,412 schools in 32 states in the US, inluding overall vaccination rate, Measles, Mumps, and Rubella (MMR) vaccination rate, as well percent of student who were exempted form the vaccination due to different reasons.

First we read in the data, and load the tidyverse library.

measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')

library(tidyverse)

Explore the data

Looking at the dimension, head, structure and summary of the data, gives us some insight about the datasets. Bases on these, we realise that there are -1 in the mmr and overall columns, which we may want to take this into account when visualising the data. You may also have noticed that one of the exemption columns is logical instead of numeric. This also help to see which columns have how many NAs.

dim(measles)
## [1] 66113    16
head(measles)
## # A tibble: 6 x 16
##   index state year  name  type  city  county district enroll   mmr overall xrel 
##   <dbl> <chr> <chr> <chr> <chr> <chr> <chr>  <lgl>     <dbl> <dbl>   <dbl> <lgl>
## 1     1 Ariz… 2018… A J … Publ… Noga… Santa… NA           51   100      -1 NA   
## 2     2 Ariz… 2018… Acad… Char… Tucs… Pima   NA           22   100      -1 NA   
## 3     3 Ariz… 2018… Acad… Char… Tucs… Pima   NA           85   100      -1 NA   
## 4     4 Ariz… 2018… Acad… Char… Phoe… Maric… NA           60   100      -1 NA   
## 5     5 Ariz… 2018… Accl… Char… Phoe… Maric… NA           43   100      -1 NA   
## 6     6 Ariz… 2018… Alfr… Publ… Phoe… Maric… NA           36   100      -1 NA   
## # … with 4 more variables: xmed <dbl>, xper <dbl>, lat <dbl>, lng <dbl>
str(measles)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 66113 obs. of  16 variables:
##  $ index   : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ state   : chr  "Arizona" "Arizona" "Arizona" "Arizona" ...
##  $ year    : chr  "2018-19" "2018-19" "2018-19" "2018-19" ...
##  $ name    : chr  "A J Mitchell Elementary" "Academy Del Sol" "Academy Del Sol - Hope" "Academy Of Mathematics And Science South" ...
##  $ type    : chr  "Public" "Charter" "Charter" "Charter" ...
##  $ city    : chr  "Nogales" "Tucson" "Tucson" "Phoenix" ...
##  $ county  : chr  "Santa Cruz" "Pima" "Pima" "Maricopa" ...
##  $ district: logi  NA NA NA NA NA NA ...
##  $ enroll  : num  51 22 85 60 43 36 24 22 26 78 ...
##  $ mmr     : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ overall : num  -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ xrel    : logi  NA NA NA NA NA NA ...
##  $ xmed    : num  NA NA NA NA 2.33 NA NA NA NA NA ...
##  $ xper    : num  NA NA NA NA 2.33 NA 4.17 NA NA NA ...
##  $ lat     : num  31.3 32.2 32.1 33.5 33.5 ...
##  $ lng     : num  -111 -111 -111 -112 -112 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   index = col_double(),
##   ..   state = col_character(),
##   ..   year = col_character(),
##   ..   name = col_character(),
##   ..   type = col_character(),
##   ..   city = col_character(),
##   ..   county = col_character(),
##   ..   district = col_logical(),
##   ..   enroll = col_double(),
##   ..   mmr = col_double(),
##   ..   overall = col_double(),
##   ..   xrel = col_logical(),
##   ..   xmed = col_double(),
##   ..   xper = col_double(),
##   ..   lat = col_double(),
##   ..   lng = col_double()
##   .. )
summary(measles)
##      index         state               year               name          
##  Min.   :   1   Length:66113       Length:66113       Length:66113      
##  1st Qu.: 429   Class :character   Class :character   Class :character  
##  Median : 997   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :1608                                                           
##  3rd Qu.:2133                                                           
##  Max.   :8066                                                           
##                                                                         
##      type               city              county          district      
##  Length:66113       Length:66113       Length:66113       Mode:logical  
##  Class :character   Class :character   Class :character   NA's:66113    
##  Mode  :character   Mode  :character   Mode  :character                 
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##      enroll            mmr            overall         xrel        
##  Min.   :   0.0   Min.   : -1.00   Min.   : -1.00   Mode:logical  
##  1st Qu.:  46.0   1st Qu.: -1.00   1st Qu.: -1.00   TRUE:109      
##  Median :  80.0   Median : 95.00   Median : 87.00   NA's:66004    
##  Mean   : 131.9   Mean   : 63.17   Mean   : 54.09                 
##  3rd Qu.: 129.0   3rd Qu.: 98.00   3rd Qu.: 96.10                 
##  Max.   :6222.0   Max.   :100.00   Max.   :100.00                 
##  NA's   :16260                                                    
##       xmed             xper             lat             lng         
##  Min.   :  0.04   Min.   :  0.17   Min.   :24.55   Min.   :-124.50  
##  1st Qu.:  1.00   1st Qu.:  2.84   1st Qu.:35.69   1st Qu.:-117.63  
##  Median :  2.00   Median :  5.00   Median :40.21   Median : -89.97  
##  Mean   :  2.91   Mean   :  6.78   Mean   :39.15   Mean   : -96.28  
##  3rd Qu.:  3.53   3rd Qu.:  7.55   3rd Qu.:42.18   3rd Qu.: -81.75  
##  Max.   :100.00   Max.   :169.23   Max.   :49.00   Max.   :  80.21  
##  NA's   :45122    NA's   :57560    NA's   :1549    NA's   :1549

We can also look at the tables of different columns. Looking at this, we see that the year column has some grouping issues, that you may want to change.

selCols <- c("state", "year", "type")
apply(measles[, selCols], 2, table )
## $state
## 
##        Arizona       Arkansas     California       Colorado    Connecticut 
##           1451            567          16098           1509            795 
##        Florida          Idaho       Illinois           Iowa          Maine 
##           2678            475           7686           1370            357 
##  Massachusetts       Michigan      Minnesota       Missouri        Montana 
##           1594           2351           1813            748            645 
##     New Jersey       New York North Carolina   North Dakota           Ohio 
##           2211           4275           2085            387           3165 
##       Oklahoma         Oregon   Pennsylvania   Rhode Island   South Dakota 
##           1249            817           1939            230            390 
##      Tennessee          Texas           Utah        Vermont       Virginia 
##           1152            811            604            349           1468 
##     Washington      Wisconsin 
##           2221           2623 
## 
## $year
## 
##    2017 2017-18 2018-19    null 
##    1939   10418   48075    5681 
## 
## $type
## 
##        BOCES      Charter Kindergarten    Nonpublic      Private       Public 
##           47          276         1488          173         6815        20692

Now, to do some visualisations, lets calculate the average of the vaccination rates and different expemtion columns.

mstat <- measles %>%
  group_by(state) %>%
  mutate(ave_xmed = mean(xmed, na.rm = T),
         ave_xper = mean(xper, na.rm = T),
         ave_xrel = mean(xrel, na.rm = T),
         ave_enroll = mean(enroll, na.rm = T),
         ave_mmr = mean(mmr, na.rm = T),
         ave_overall = mean(overall, na.rm = T)) %>%
  data.frame() 

Visualisation

Barplot

In order to make barplots for the vaccination rate columns, we make sure that we filter out -1 values from that column. As we have calculated the mean vaccination rate values per state, we remove duplicated rows too. Note that we can order the barplots, simply by using the reorder() function.

mstat %>%  
  filter(! duplicated(state) & ave_overall != -1) %>% 
ggplot(., aes(x = reorder(state, ave_overall), y = ave_overall)) + 
  geom_bar(stat = "identity", fill = "gray20") +
  ggtitle("Average overall vaccination across states") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

mstat %>%  
  filter(! duplicated(state) & ave_mmr != -1) %>% 
ggplot(., aes(x = reorder(state, ave_mmr), y = ave_mmr)) + 
  geom_bar(stat = "identity", fill = "gray60") +
  ggtitle("Average MMR vaccination across states") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Now, if we want to know what percent exemption of different reasons we have in each state, we should first change the structure of the data to be in the long format, so that all teh info for different exemptions go into one column. For this, we use pivot_longer() function. The output of this can be directly used in the ggplot() function. Note that we also filter for NaN valuse in the new column “Average_values”.

mstat %>%
  filter(!duplicated(state)) %>%
  pivot_longer(.,
               cols = ave_xmed:ave_xrel,
               names_to = "Exemption",
               values_to = "Average_values") %>%
  filter(!is.nan(Average_values)) %>%
  ggplot(., aes(
    x = reorder(state, ave_mmr),
    y = Average_values,
    fill = Exemption
  )) +
  geom_bar(stat = "identity",
           position = "dodge",
           width = 0.8) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Scatterplot

The other way of looking at the data is to examine associations. For example, there seems to be a positive correlation between overall vaccination rate and mmr, however, there are schools in some states, such as Colorado that have high mmr but generally lower overall vaccination rate.

mstat %>%
  filter(overall != -1 & mmr != -1) %>%
  ggplot(., aes(x = mmr, y = overall, color = state)) +
  geom_point(alpha = 0.6) +
  theme_bw()

Map

We can also examine this data in form of a map. There are several ways of doing this but here I am giving two simple examples to get started. These include using the coord_quickmap() function from ggplot2 and plot_usmap() function from usmap package.

coord_quickmap

Here, we plot all lng and lat, and then because there seem to be some outliers in these data, we focus on only those lng and lat that properly covers the US coordinates. Then we show how to simply color that based on state.

ggplot(mstat, aes(lng, lat)) + 
  geom_point(size = .25, show.legend = FALSE) +
  coord_quickmap()

mstat %>% 
  filter(lng < -60 ) %>% 
ggplot(., aes(lng, lat)) + 
  geom_point(size = .25, show.legend = FALSE) +
  coord_quickmap()

mstat %>% 
  filter(lng < -60 ) %>% 
ggplot(., aes(lng, lat, color = state)) + 
  geom_point(size = .25, show.legend = FALSE) +
  coord_quickmap()

We can also colour the points based on some continuous values, such as mmr or overall. To have a better contrast in the colour, I decided to keep -1 values, however, you can remove it and see what your plot looks like.

mstat %>%
  filter(lng < -60) %>%
  # filter(mmr != -1) %>%
  ggplot(., aes(lng, lat, color = mmr)) +
  geom_point(size = .25, show.legend = TRUE) +
  scale_color_viridis_c(name = "MMR", label = scales::comma) +
  coord_quickmap() +
  theme(legend.position = "right") +
  theme_dark()

mstat %>%
  filter(lng < -60) %>%
  # filter(overall != -1) %>%
  ggplot(., aes(lng, lat, color = overall)) +
  geom_point(size = .25, show.legend = TRUE) +
  scale_color_viridis_c(name = "Overall", label = scales::comma) +
  coord_quickmap() +
  theme(legend.position = "right") +
  theme_dark()

Make it interactive

Now, we subset the data to those that do not have -1 in overall column and color based on the mmr values. We can zoom on different states and make the plots inteactive using the plotly package and ggplotly() function. Uncomment plotly::ggplotly(p, tiptools = "text") in the below code in your computer to see the interactivity.

p <- mstat %>%
  filter(lng < -110) %>%
  filter(overall != -1) %>%
  ggplot(., aes(lng, lat, color = mmr, text = name)) +
  geom_point(size = .25, show.legend = TRUE) +
  scale_color_viridis_c(name = "MMR", label = scales::comma) +
  coord_quickmap() +
  theme(legend.position = "right") +
  theme_dark()

p

# plotly::ggplotly(p, tiptools = "text")

usmap

As I mentioned above, we can also use usmap package to generate maps for the US. To do this, I have been looking at the post here; but there are obviously other relevant posts, such as this that shows how to generate maps using ggplot2, for example using geom_polygon() function.

Note that as we do not have state abbreviations, we use state.name and state.abb from the usmap, merge these to our data and use plot_usmap() to visualise it.

library(usmap)

mstatUniqueState <- mstat[! duplicated(mstat$state), ]
abbr_state <- tibble(state = state.name, abb = state.abb)
  
mstatUniqueState %>% 
  filter(ave_mmr != -1) %>% 
  left_join(abbr_state) %>% 
plot_usmap(
  data = .,
  region = "state",
  values = "ave_mmr",
  color = "purple"
) +
  scale_fill_viridis_c(name = "MMR", label = scales::comma) +
  theme(legend.position = "right") +
  theme(panel.background = element_rect(color = "white", fill = "gray10"))

mstatUniqueState %>% 
  filter(ave_overall != -1) %>% 
  left_join(abbr_state) %>% 
plot_usmap(
  data = .,
  region = "state",
  values = "ave_overall",
  color = "purple"
) +
  scale_fill_viridis_c(name = "Overall", label = scales::comma) +
  theme(legend.position = "right") +
  theme(panel.background = element_rect(color = "white", fill = "gray10"))

Session info

sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Catalina 10.15.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] usmap_0.5.0        skimr_2.1          extrafont_0.17     ggtext_0.1.0      
##  [5] geofacet_0.1.10    psych_1.9.12.31    forcats_0.4.0      stringr_1.4.0     
##  [9] dplyr_0.8.4        purrr_0.3.3        readr_1.3.1        tidyr_1.0.0       
## [13] tibble_2.1.3       ggplot2_3.3.0.9000 tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.1          rnaturalearth_0.1.0 viridisLite_0.3.0  
##  [4] jsonlite_1.6.1      modelr_0.1.5        assertthat_0.2.1   
##  [7] highr_0.8           sp_1.3-2            cellranger_1.1.0   
## [10] yaml_2.2.1          ggrepel_0.8.1       Rttf2pt1_1.3.8     
## [13] pillar_1.4.3        backports_1.1.5     lattice_0.20-38    
## [16] glue_1.3.1          extrafontdb_1.0     digest_0.6.25      
## [19] gridtext_0.1.1      rvest_0.3.5         colorspace_1.4-1   
## [22] htmltools_0.4.0     pkgconfig_2.0.3     broom_0.5.2        
## [25] haven_2.2.0         scales_1.1.0        jpeg_0.1-8.1       
## [28] generics_0.0.2      farver_2.0.3        ellipsis_0.3.0     
## [31] repr_1.1.0          withr_2.1.2         cli_2.0.2          
## [34] mnormt_1.5-6        magrittr_1.5        crayon_1.3.4       
## [37] readxl_1.3.1        evaluate_0.14       fs_1.3.1           
## [40] fansi_0.4.1         nlme_3.1-140        xml2_1.2.2         
## [43] class_7.3-15        tools_3.6.1         imguR_1.0.3        
## [46] hms_0.5.2           lifecycle_0.1.0     munsell_0.5.0      
## [49] geogrid_0.1.1       reprex_0.3.0        compiler_3.6.1     
## [52] e1071_1.7-3         rlang_0.4.4         classInt_0.4-2     
## [55] units_0.6-5         grid_3.6.1          rstudioapi_0.11    
## [58] base64enc_0.1-3     labeling_0.3        rmarkdown_2.1      
## [61] gtable_0.3.0        DBI_1.0.0           curl_4.3           
## [64] R6_2.4.1            gridExtra_2.3       lubridate_1.7.4    
## [67] knitr_1.28          utf8_1.1.4          rgeos_0.5-2        
## [70] KernSmooth_2.23-15  stringi_1.4.6       parallel_3.6.1     
## [73] Rcpp_1.0.3          vctrs_0.2.3         sf_0.8-0           
## [76] png_0.1-7           dbplyr_1.4.2        tidyselect_1.0.0   
## [79] xfun_0.12

More insights from the night!